# formulaFunctions.R
# created 2012 July 13

# Functions for working with objects of class "formula" or "Formula"

getLMFormula <- function(IVformula) {
  # Takes a formula from an ivreg() regression.  Returns a formula that can be
  # plugged into lm().  This makes it easy to estimate the OLS counterparts of
  # IV regressions.  [2012 02 10]
  if (! 'formula' %in% class(IVformula)) { 
    stop("IVformula needs to be of class formula.") 
  }
  depvar  <- as.character(IVformula[2])
  indvars <- as.character(IVformula[3])
  indvar.stop.pos <- regexpr('\\|', indvars) - 1
  if (indvar.stop.pos == -1) { 
    stop('Cannot find "|" in the ivreg() formula.') 
  } 
  indvars <- substr(indvars, 1, indvar.stop.pos)
  formula <- as.formula(paste(depvar, '~', indvars))
  return(formula)
}

getReducedForm <- function(IVformula) {
  # Takes a Formula used for IV regression.  Returns the reduced form.  
  # [2012 07 13]
  if (! 'Formula' %in% class(IVformula)) {
    stop("IVformula needs to be of class Formula.")
  }
  if (length(attributes(IVformula)$rhs) != 2) {
    stop("Something is wrong: IVformula doesn't have length 2.")
  }  
  lhs <- attributes(IVformula)$lhs[[1]] 
  rhs <- attributes(IVformula)$rhs[[2]]
  formula(call("~", lhs, rhs))
}

getFirstStage <- function(IVFormula, endog = 'HSgrad', exclude.instruments = FALSE) {
  # Takes a Formula used for IV regression.  Returns a variable of class 
  # formula that specifies the first-stage regression.  [2012 07 19]
  #
  # exclude.instruments = TRUE excludes instruments that have names  
  # like "CA", "CA.fac", "CL.foo".  [2012 07 19]
  if (! 'Formula' %in% class(IVFormula)) {
    stop("IVFormula needs to be of class Formula.")
  }
  if (length(attributes(IVFormula)$rhs) != 2) {
    stop("Something is wrong: IVFormula doesn't have length 2.")
  }  
  FirstStage <- attributes(IVFormula)$rhs[[2]]
  FirstStageString <- paste(deparse(FirstStage), collapse = '')
  
  if (exclude.instruments) {
    FirstStageString <- gsub('C[AL][\\.\\w]+\\s+[*\\+]', '', FirstStageString, perl = TRUE)
  }
  formula <- as.formula(paste(endog, '~', FirstStageString))
  environment(formula) <- .GlobalEnv
  formula
}

checkIVModel <- function (modName, mod, endogVarNames) {
  # Checks to ensure that first-stage variables other than the instruments are 
  # included in the second stage.  
  
  # Checks to ensure that second-stage variables other than those named in 
  # "endogVarNames" are included in the first stage.   
  
  # Don't use all.vars() here.  It doesn't account for interaction 
  # terms.  For example, if it sees a:b, it just reports that a and b 
  # are in the model -- not that a:b is.  For this reason, relying on 
  # all.vars() when checking IV model specification can lead to 
  # dangerous oversights.  [2012 07 23] 
  require(stringr)  # for str_wrap
  
  if (! 'Formula' %in% class(mod)) {
    stop(modName, " must be of class Formula.")
  }
  if (length(attributes(mod)$rhs) != 2) {
    stop("Something is wrong: ", modName, " doesn't have length 2.")
  }  
  SecondStage     <- paste0(deparse(attributes(mod)$rhs[[1]]), collapse = ' ')
  FirstStage      <- paste0(deparse(attributes(mod)$rhs[[2]]), collapse = ' ')
  SecondStageVars <- strsplit(SecondStage, '\\s*\\+\\s*')[[1]]
  FirstStageVars  <- strsplit(FirstStage, '\\s*\\+\\s*')[[1]]
  
  # Sometimes, the previous commands transform "poly(x, 1)" to 
  # "poly(x,   1)".  These two lines fix that problem.  [2012 10 28]
  SecondStageVars <- gsub('(poly\\(\\w+,)\\s+', '\\1 ', SecondStageVars)   
  FirstStageVars <- gsub('(poly\\(\\w+,)\\s+',  '\\1 ', FirstStageVars)  

  SecondInFirst   <- SecondStageVars[!SecondStageVars%in%endogVarNames] %in% FirstStageVars  
  if (! all(SecondInFirst)) {
    writeLines(
      c(
        str_wrap(
          paste0("In ", modName, ", some second-stage variable other than (", paste(endogVarNames, collapse=", "), ") hasn't been included in the first stage."), 
          72, exdent = 2)
      )
    )
    stop("see immediately above for details.")
  }
  FirstInSecond   <- FirstStageVars[!grepl('^\\(?(C[AL])|CSLGrad', FirstStageVars)] %in% SecondStageVars
  if (! all(FirstInSecond)) {
    writeLines(
      c(
        str_wrap(
          paste0("In ", modName, ", some first-stage variable other than a CA or a CL variable hasn't been included in the second stage."), 
          72, exdent = 2)
      )
    )
    stop("see immediately above for details.")   
  }   
}

removeVarFromFormula <- function(formula, varName) {
  # It's best to remove variables from formulas with update.formula().  But 
  # But I sometimes want to remove a variable from a formula from  
  # within a function, when I have only the name of the variable that 
  # I want to remove.  update.formula() cannot handle such situations.  
  # This function can.  [2012 10 25]
  if (! 'formula' %in% class(formula)) { 
    stop("formula needs to be of class formula.") 
  }
  if (! 'character' %in% class(varName)) {
    stop("varName needs to be of class character")
  }
  depvar  <- as.character(formula[2])
  indvars <- as.character(formula[3])
  indvars <- sub(paste0(varName, '\\s?\\+?'), '', indvars)
  indvars <- sub('\\s*\\+\\s*$', '', indvars)  # eliminate plus signs at the end of the formula
  formula <- as.formula(paste(depvar, '~', indvars))
  environment(formula) <- .GlobalEnv
  return(formula)
}
    
addVarsToFormula <- function(formula, varNames) {
  # It's best to add variables to formulas with update.formula().  But 
  # I sometimes have only the names of the variables, and 
  # update.formula() has trouble dealing with strings.  This function  
  # handles such situations.  [2012 11 21]
  if (! 'formula' %in% class(formula)) { 
    stop("formula needs to be of class formula.") 
  }
  if (! 'character' %in% class(varNames)) {
    stop("varName needs to be of class character")
  }
  depvar  <- as.character(formula[2])
  indvars <- as.character(formula[3])
  varNames <- paste(varNames, collapse = ' + ')  
  indvars <- paste0(indvars, ' + ', varNames)
  formula <- as.formula(paste(depvar, '~', indvars))
  environment(formula) <- .GlobalEnv
  return(formula)
}
